{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from requests_html import HTMLSession"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "91\n"
     ]
    }
   ],
   "source": [
    "#学校要闻\n",
    "session = HTMLSession()\n",
    "r0 = session.get(\"https://www.nfu.edu.cn/xxyw/index.htm\")\n",
    "title = r0.html.xpath('//div[@class=\"news_title\"]/a/@title')\n",
    "links = r0.html.xpath('//div[@class=\"news_title\"]/a/@href')\n",
    "data = r0.html.xpath('//font[@class=\"right-more\"]/text()')\n",
    "\n",
    "for i in range(1,100):\n",
    "    r0 = session.get('https://www.nfu.edu.cn/xxyw/index'+str(i)+'.htm')\n",
    "    if r0.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "      <th>链结</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>快！来为我校大学生国旗护卫队参赛点赞！</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/5b71d46d3b114859ae...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>专注当下，冲刺高考，奋斗出最美的青春</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/f9bcd8092b494a04be...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>我校召开高校教师职称评审 政策解读专题报告会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/f0002a2424f34ad8b2...</td>\n",
       "      <td>2021-04-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>我校召开2021年一流专业、一流课程、教学成果奖申报工作推进会</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/48b0929919ec4d2d9a...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>我校承办首届 “新时代从商培养工程”</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/0d7bd841484a42a69d...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3575</th>\n",
       "      <td>我院艺创系学子在广东省“和谐杯”手绘技能大赛获佳绩</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/1eca80f5d69240e491...</td>\n",
       "      <td>2013-10-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3576</th>\n",
       "      <td>我院多个项目获2013年度省级以上本科教学质量工程项目立项</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/e2ef39bdace94f3da9...</td>\n",
       "      <td>2013-10-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3577</th>\n",
       "      <td>首届从化地区学工部（处）联谊会在我院举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/5a530157f3764b32ad...</td>\n",
       "      <td>2013-09-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3578</th>\n",
       "      <td>我院经管系2013级创新实验国际班开班典礼隆重举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/2af0127ce4234c7aa5...</td>\n",
       "      <td>2013-09-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3579</th>\n",
       "      <td>学院教学工作会议顺利召开</td>\n",
       "      <td>https://www.nfu.edu.cn/xxyw/b844901be7a6412eb7...</td>\n",
       "      <td>2013-09-26</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3580 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                   标题  \\\n",
       "0                 快！来为我校大学生国旗护卫队参赛点赞！   \n",
       "1                  专注当下，冲刺高考，奋斗出最美的青春   \n",
       "2              我校召开高校教师职称评审 政策解读专题报告会   \n",
       "3     我校召开2021年一流专业、一流课程、教学成果奖申报工作推进会   \n",
       "4                  我校承办首届 “新时代从商培养工程”   \n",
       "...                               ...   \n",
       "3575        我院艺创系学子在广东省“和谐杯”手绘技能大赛获佳绩   \n",
       "3576    我院多个项目获2013年度省级以上本科教学质量工程项目立项   \n",
       "3577             首届从化地区学工部（处）联谊会在我院举行   \n",
       "3578        我院经管系2013级创新实验国际班开班典礼隆重举行   \n",
       "3579                     学院教学工作会议顺利召开   \n",
       "\n",
       "                                                     链结          日期  \n",
       "0     https://www.nfu.edu.cn/xxyw/5b71d46d3b114859ae...  2021-04-09  \n",
       "1     https://www.nfu.edu.cn/xxyw/f9bcd8092b494a04be...  2021-04-02  \n",
       "2     https://www.nfu.edu.cn/xxyw/f0002a2424f34ad8b2...  2021-04-10  \n",
       "3     https://www.nfu.edu.cn/xxyw/48b0929919ec4d2d9a...  2021-04-02  \n",
       "4     https://www.nfu.edu.cn/xxyw/0d7bd841484a42a69d...  2021-04-02  \n",
       "...                                                 ...         ...  \n",
       "3575  https://www.nfu.edu.cn/xxyw/1eca80f5d69240e491...  2013-10-14  \n",
       "3576  https://www.nfu.edu.cn/xxyw/e2ef39bdace94f3da9...  2013-10-11  \n",
       "3577  https://www.nfu.edu.cn/xxyw/5a530157f3764b32ad...  2013-09-29  \n",
       "3578  https://www.nfu.edu.cn/xxyw/2af0127ce4234c7aa5...  2013-09-29  \n",
       "3579  https://www.nfu.edu.cn/xxyw/b844901be7a6412eb7...  2013-09-26  \n",
       "\n",
       "[3580 rows x 3 columns]"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "        \n",
    "for i in range(90):\n",
    "    r0 = session.get(\"https://www.nfu.edu.cn/xxyw/index%d.htm\"%(i))\n",
    "    title.extend(r0.html.xpath('//div[@class=\"news_title\"]/a/@title')) \n",
    "    links.extend(r0.html.xpath('//div[@class=\"news_title\"]/a/@href'))\n",
    "    data.extend(r0.html.xpath('//font[@class=\"right-more\"]/text()'))\n",
    "    \n",
    "df0 = pd.DataFrame( {\n",
    "        \"标题\": title,\n",
    "        \"链结\": [\"https://www.nfu.edu.cn/xxyw/\"+i for i in links],\n",
    "        \"日期\": data,\n",
    "             } )\n",
    "df0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "df0.to_excel(\"学校要闻.xlsx\", sheet_name=\"学校要闻\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "85\n"
     ]
    }
   ],
   "source": [
    "#校园动态\n",
    "session = HTMLSession()\n",
    "r1 = session.get(\"https://www.nfu.edu.cn/xydt/index.htm\")\n",
    "title = r1.html.xpath('//div[@class=\"news_title\"]/a/@title')\n",
    "links = r1.html.xpath('//div[@class=\"news_title\"]/a/@href')\n",
    "data = r1.html.xpath('//font[@class=\"right-more\"]/text()')\n",
    "\n",
    "for i in range(1,100):\n",
    "    r1 = session.get('https://www.nfu.edu.cn/xydt/index'+str(i)+'.htm')\n",
    "    if r1.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "      <th>链结</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>【国奖映像】苏绮筠：让优秀成为习惯</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/7dfe6fcd15fd495597...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>【国奖映像】陈宇：心怀热爱，奔赴梦想</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/09627d3243ee4578ac...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>护理与健康学院2020-2021第二学期团员培训课程第2讲圆满结束</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/debea203b0c84a3092...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>护理与健康学院2020-2021年度第二学期3月份团支部委员会顺利举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/9ec16bf90e164071b6...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>商学院电子商务专业召开申请调整学位授予学科门类 专家评审会</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/cf4420785b9046e998...</td>\n",
       "      <td>2021-04-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3343</th>\n",
       "      <td>勇攀高峰，展望未来——外文系新学期班长会议顺利召开</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/5912160e075c4aa9a0...</td>\n",
       "      <td>2016-03-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3344</th>\n",
       "      <td>走进政商宿舍，走近政商学子——记新学期师生宿舍走访</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/29cb4400d9e44acfbe...</td>\n",
       "      <td>2016-03-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3345</th>\n",
       "      <td>我院会计学系逢甲大学研修专班开训典礼顺利举行</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/fe516b71e79544bb80...</td>\n",
       "      <td>2016-02-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3346</th>\n",
       "      <td>寒潮不减热情 相聚更显情深——电子通信与软件工程系顺利举办校友交流会</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/30996b733cb74e0e81...</td>\n",
       "      <td>2016-01-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3347</th>\n",
       "      <td>产教结合，共享双赢——电软系赴企业考察交流</td>\n",
       "      <td>https://www.nfu.edu.cn/xydt/fbe740cdd4534c20af...</td>\n",
       "      <td>2016-01-25</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3348 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                       标题  \\\n",
       "0                       【国奖映像】苏绮筠：让优秀成为习惯   \n",
       "1                      【国奖映像】陈宇：心怀热爱，奔赴梦想   \n",
       "2       护理与健康学院2020-2021第二学期团员培训课程第2讲圆满结束   \n",
       "3     护理与健康学院2020-2021年度第二学期3月份团支部委员会顺利举行   \n",
       "4           商学院电子商务专业召开申请调整学位授予学科门类 专家评审会   \n",
       "...                                   ...   \n",
       "3343            勇攀高峰，展望未来——外文系新学期班长会议顺利召开   \n",
       "3344            走进政商宿舍，走近政商学子——记新学期师生宿舍走访   \n",
       "3345               我院会计学系逢甲大学研修专班开训典礼顺利举行   \n",
       "3346   寒潮不减热情 相聚更显情深——电子通信与软件工程系顺利举办校友交流会   \n",
       "3347                产教结合，共享双赢——电软系赴企业考察交流   \n",
       "\n",
       "                                                     链结          日期  \n",
       "0     https://www.nfu.edu.cn/xydt/7dfe6fcd15fd495597...  2021-04-09  \n",
       "1     https://www.nfu.edu.cn/xydt/09627d3243ee4578ac...  2021-04-09  \n",
       "2     https://www.nfu.edu.cn/xydt/debea203b0c84a3092...  2021-04-09  \n",
       "3     https://www.nfu.edu.cn/xydt/9ec16bf90e164071b6...  2021-04-09  \n",
       "4     https://www.nfu.edu.cn/xydt/cf4420785b9046e998...  2021-04-07  \n",
       "...                                                 ...         ...  \n",
       "3343  https://www.nfu.edu.cn/xydt/5912160e075c4aa9a0...  2016-03-01  \n",
       "3344  https://www.nfu.edu.cn/xydt/29cb4400d9e44acfbe...  2016-03-01  \n",
       "3345  https://www.nfu.edu.cn/xydt/fe516b71e79544bb80...  2016-02-29  \n",
       "3346  https://www.nfu.edu.cn/xydt/30996b733cb74e0e81...  2016-01-28  \n",
       "3347  https://www.nfu.edu.cn/xydt/fbe740cdd4534c20af...  2016-01-25  \n",
       "\n",
       "[3348 rows x 3 columns]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for i in range(84):\n",
    "    r1 = session.get(\"https://www.nfu.edu.cn/xydt/index%d.htm\"%(i))\n",
    "    title.extend(r1.html.xpath('//div[@class=\"news_title\"]/a/@title')) \n",
    "    links.extend(r1.html.xpath('//div[@class=\"news_title\"]/a/@href'))\n",
    "    data.extend(r1.html.xpath('//font[@class=\"right-more\"]/text()'))\n",
    "    \n",
    "df1 = pd.DataFrame( {\n",
    "        \"标题\": title,\n",
    "        \"链结\": [\"https://www.nfu.edu.cn/xydt/\"+i for i in links],\n",
    "        \"日期\": data,\n",
    "             } )\n",
    "df1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "df1.to_excel(\"校园动态.xlsx\", sheet_name=\"校园动态\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "35\n"
     ]
    }
   ],
   "source": [
    "#通知公告\n",
    "session = HTMLSession()\n",
    "r2 = session.get(\"https://www.nfu.edu.cn/tzgg/index.htm\")\n",
    "title = r2.html.xpath('//div[@class=\"news_title\"]/a/@title')\n",
    "links = r2.html.xpath('//div[@class=\"news_title\"]/a/@href')\n",
    "data = r2.html.xpath('//font[@class=\"right-more\"]/text()')\n",
    "\n",
    "for i in range(1,100):\n",
    "    r2 = session.get('https://www.nfu.edu.cn/tzgg/index'+str(i)+'.htm')\n",
    "    if r2.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "      <th>链结</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>校园管理部关于2021年元旦放假校园生活服务安排的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/cd60e06378e5449294...</td>\n",
       "      <td>2020-12-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>中山大学南方学院关于2021年元旦放假安排的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/16fcbd56eab04220b3...</td>\n",
       "      <td>2020-12-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>关于开展2020年知识产权竞赛的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/155655d4a7e74c7695...</td>\n",
       "      <td>2020-12-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>中山大学南方学院关于举办2020年预防艾滋病巡讲活动的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/f381db0e5b3e4746b3...</td>\n",
       "      <td>2020-12-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>关于开展2020年安全知识竞赛的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/ae83ecc6ce894bcb81...</td>\n",
       "      <td>2020-12-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>675</th>\n",
       "      <td>关于调整学院至中大南校区班车下车点的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/3f57418b49024903a9...</td>\n",
       "      <td>2015-04-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>676</th>\n",
       "      <td>关于调整班车运行时刻表征求意见的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/d594c06a6c174b34b8...</td>\n",
       "      <td>2015-04-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>677</th>\n",
       "      <td>学院交通信息公告</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/5568847ca8a84832b3...</td>\n",
       "      <td>2015-04-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>678</th>\n",
       "      <td>会计学系“财经名家讲坛”系列讲座之第二十五讲讲座通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/095c1bab2ccf4e98b2...</td>\n",
       "      <td>2015-04-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>679</th>\n",
       "      <td>关于2015年6月普通话水平测试的通知</td>\n",
       "      <td>https://www.nfu.edu.cn/tzgg/76e4c24581bb4a79b8...</td>\n",
       "      <td>2015-04-23</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>680 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                标题  \\\n",
       "0      校园管理部关于2021年元旦放假校园生活服务安排的通知   \n",
       "1         中山大学南方学院关于2021年元旦放假安排的通知   \n",
       "2               关于开展2020年知识产权竞赛的通知   \n",
       "3    中山大学南方学院关于举办2020年预防艾滋病巡讲活动的通知   \n",
       "4               关于开展2020年安全知识竞赛的通知   \n",
       "..                             ...   \n",
       "675           关于调整学院至中大南校区班车下车点的通知   \n",
       "676             关于调整班车运行时刻表征求意见的通知   \n",
       "677                       学院交通信息公告   \n",
       "678     会计学系“财经名家讲坛”系列讲座之第二十五讲讲座通知   \n",
       "679            关于2015年6月普通话水平测试的通知   \n",
       "\n",
       "                                                    链结          日期  \n",
       "0    https://www.nfu.edu.cn/tzgg/cd60e06378e5449294...  2020-12-25  \n",
       "1    https://www.nfu.edu.cn/tzgg/16fcbd56eab04220b3...  2020-12-17  \n",
       "2    https://www.nfu.edu.cn/tzgg/155655d4a7e74c7695...  2020-12-16  \n",
       "3    https://www.nfu.edu.cn/tzgg/f381db0e5b3e4746b3...  2020-12-03  \n",
       "4    https://www.nfu.edu.cn/tzgg/ae83ecc6ce894bcb81...  2020-12-03  \n",
       "..                                                 ...         ...  \n",
       "675  https://www.nfu.edu.cn/tzgg/3f57418b49024903a9...  2015-04-29  \n",
       "676  https://www.nfu.edu.cn/tzgg/d594c06a6c174b34b8...  2015-04-29  \n",
       "677  https://www.nfu.edu.cn/tzgg/5568847ca8a84832b3...  2015-04-28  \n",
       "678  https://www.nfu.edu.cn/tzgg/095c1bab2ccf4e98b2...  2015-04-28  \n",
       "679  https://www.nfu.edu.cn/tzgg/76e4c24581bb4a79b8...  2015-04-23  \n",
       "\n",
       "[680 rows x 3 columns]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for i in range(34):\n",
    "    r2 = session.get(\"https://www.nfu.edu.cn/tzgg/index%d.htm\"%(i))\n",
    "    title.extend(r2.html.xpath('//div[@class=\"news_title\"]/a/@title')) \n",
    "    links.extend(r2.html.xpath('//div[@class=\"news_title\"]/a/@href'))\n",
    "    data.extend(r2.html.xpath('//font[@class=\"right-more\"]/text()'))\n",
    "    \n",
    "df2 = pd.DataFrame( {\n",
    "        \"标题\": title,\n",
    "        \"链结\": [\"https://www.nfu.edu.cn/tzgg/\"+i for i in links],\n",
    "        \"日期\": data,\n",
    "             } )\n",
    "df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2.to_excel(\"通知公告.xlsx\", sheet_name=\"通知公告\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22\n"
     ]
    }
   ],
   "source": [
    "#招投标\n",
    "session = HTMLSession()\n",
    "r3 = session.get(\"https://www.nfu.edu.cn/ztb/index.htm\")\n",
    "title = r3.html.xpath('//div[@class=\"news_title\"]/a/@title')\n",
    "links = r3.html.xpath('//div[@class=\"news_title\"]/a/@href')\n",
    "data = r3.html.xpath('//font[@class=\"right-more\"]/text()')\n",
    "\n",
    "for i in range(1,100):\n",
    "    r3 = session.get('https://www.nfu.edu.cn/ztb/index'+str(i)+'.htm')\n",
    "    if r3.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "      <th>链结</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目招标开标延期公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/4aa14103a6d34d42837...</td>\n",
       "      <td>2021-04-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目 招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/ea8754261f26419080a...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>中山大学南方学院数字电路基础实验室、电路与模拟电子实验室设备采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/7226fe9acf3b4757b97...</td>\n",
       "      <td>2021-03-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>中山大学南方学院垃圾清运和处理服务项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/414b2db5e6c04f99be1...</td>\n",
       "      <td>2021-03-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>中山大学南方学院2021年度维修、改造工程施工项目中标结果公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/60c660848ef44283bca...</td>\n",
       "      <td>2021-03-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>415</th>\n",
       "      <td>中山大学南方学院人体解剖实验室采购项目中标（成交）结果公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/44ef936e27cb4456b2e...</td>\n",
       "      <td>2015-05-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>416</th>\n",
       "      <td>中山大学南方学院招待所改造设计采购项目招标公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/b74775af0f294621964...</td>\n",
       "      <td>2015-05-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>417</th>\n",
       "      <td>中山大学南方学院视频监控系统项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/26e7011c300e44d295c...</td>\n",
       "      <td>2015-05-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>418</th>\n",
       "      <td>中山大学南方学院人体解剖实验室设备采购项目中标公示</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/d64d62048b704ec4b57...</td>\n",
       "      <td>2015-04-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>419</th>\n",
       "      <td>中山大学南方学院学生体质健康测试仪采购项目中标（成交）结果公告</td>\n",
       "      <td>https://www.nfu.edu.cn/ztb/aebfa587e06a4609a6d...</td>\n",
       "      <td>2015-04-27</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>420 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                         标题  \\\n",
       "0       广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目招标开标延期公告   \n",
       "1          广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目 招标公告   \n",
       "2    中山大学南方学院数字电路基础实验室、电路与模拟电子实验室设备采购项目招标公告   \n",
       "3                   中山大学南方学院垃圾清运和处理服务项目招标公告   \n",
       "4           中山大学南方学院2021年度维修、改造工程施工项目中标结果公示   \n",
       "..                                      ...   \n",
       "415           中山大学南方学院人体解剖实验室采购项目中标（成交）结果公告   \n",
       "416                 中山大学南方学院招待所改造设计采购项目招标公告   \n",
       "417                    中山大学南方学院视频监控系统项目中标公示   \n",
       "418               中山大学南方学院人体解剖实验室设备采购项目中标公示   \n",
       "419         中山大学南方学院学生体质健康测试仪采购项目中标（成交）结果公告   \n",
       "\n",
       "                                                    链结          日期  \n",
       "0    https://www.nfu.edu.cn/ztb/4aa14103a6d34d42837...  2021-04-08  \n",
       "1    https://www.nfu.edu.cn/ztb/ea8754261f26419080a...  2021-04-02  \n",
       "2    https://www.nfu.edu.cn/ztb/7226fe9acf3b4757b97...  2021-03-31  \n",
       "3    https://www.nfu.edu.cn/ztb/414b2db5e6c04f99be1...  2021-03-17  \n",
       "4    https://www.nfu.edu.cn/ztb/60c660848ef44283bca...  2021-03-11  \n",
       "..                                                 ...         ...  \n",
       "415  https://www.nfu.edu.cn/ztb/44ef936e27cb4456b2e...  2015-05-04  \n",
       "416  https://www.nfu.edu.cn/ztb/b74775af0f294621964...  2015-05-04  \n",
       "417  https://www.nfu.edu.cn/ztb/26e7011c300e44d295c...  2015-05-03  \n",
       "418  https://www.nfu.edu.cn/ztb/d64d62048b704ec4b57...  2015-04-28  \n",
       "419  https://www.nfu.edu.cn/ztb/aebfa587e06a4609a6d...  2015-04-27  \n",
       "\n",
       "[420 rows x 3 columns]"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for i in range(21):\n",
    "    r3 = session.get(\"https://www.nfu.edu.cn/ztb/index%d.htm\"%(i))\n",
    "    title.extend(r3.html.xpath('//div[@class=\"news_title\"]/a/@title')) \n",
    "    links.extend(r3.html.xpath('//div[@class=\"news_title\"]/a/@href'))\n",
    "    data.extend(r3.html.xpath('//font[@class=\"right-more\"]/text()'))\n",
    "    \n",
    "df3 = pd.DataFrame( {\n",
    "        \"标题\": title,\n",
    "        \"链结\": [\"https://www.nfu.edu.cn/ztb/\"+i for i in links],\n",
    "        \"日期\": data,\n",
    "             } )\n",
    "df3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "df3.to_excel(\"招投标.xlsx\", sheet_name=\"招投标\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "26\n"
     ]
    }
   ],
   "source": [
    "#高教动态\n",
    "session = HTMLSession()\n",
    "r4 = session.get(\"https://www.nfu.edu.cn/gjdt/index.htm\")\n",
    "title = r4.html.xpath('//div[@class=\"news_title\"]/a/@title')\n",
    "links = r4.html.xpath('//div[@class=\"news_title\"]/a/@href')\n",
    "data = r4.html.xpath('//font[@class=\"right-more\"]/text()')\n",
    "\n",
    "for i in range(1,100):\n",
    "    r4 = session.get('https://www.nfu.edu.cn/gjdt/index'+str(i)+'.htm')\n",
    "    if r4.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "      <th>链结</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>教育部党组《求是》撰文：精心谋划 切实抓好教育系统党史学习教育</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/309be8b078444044b5...</td>\n",
       "      <td>2021-04-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>教育部长陈宝生：把巩固拓展作为开局之年工作主题，做到6个到位</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/159b20971f8b4051ba...</td>\n",
       "      <td>2021-03-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>如何建设高质量教育体系？“十四五”规划和2035年远景目标纲要明确了</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/27ba495edc1b49f88b...</td>\n",
       "      <td>2021-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>教育部长陈宝生《旗帜》撰文：建设高质量教育体系，加快建成教育强国</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/20dc120c250642cca5...</td>\n",
       "      <td>2021-01-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>重磅！《推进粤港澳大湾区高等教育合作发展规划》正式印发</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/b43531427fb44695bb...</td>\n",
       "      <td>2020-12-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>495</th>\n",
       "      <td>教育部部署2014年高等学校科技改革重点工作</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/b9331db1468b4c2783...</td>\n",
       "      <td>2014-04-18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>496</th>\n",
       "      <td>高等教育国际化须警惕过度商业化</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/e2a83d35ce63437e81...</td>\n",
       "      <td>2014-04-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>497</th>\n",
       "      <td>广东出台“特支计划”重点遴选培养杰出人才</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/a10bea05e10f4940b4...</td>\n",
       "      <td>2014-04-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>498</th>\n",
       "      <td>75所高校毕业生就业质量报告：须完善规范和标准</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/da3b772713c34cb084...</td>\n",
       "      <td>2014-04-14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>499</th>\n",
       "      <td>分类、开放将成中国高校科研评价方向</td>\n",
       "      <td>https://www.nfu.edu.cn/gjdt/fcfca6a4203f4b5586...</td>\n",
       "      <td>2014-04-14</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>500 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                     标题  \\\n",
       "0       教育部党组《求是》撰文：精心谋划 切实抓好教育系统党史学习教育   \n",
       "1        教育部长陈宝生：把巩固拓展作为开局之年工作主题，做到6个到位   \n",
       "2    如何建设高质量教育体系？“十四五”规划和2035年远景目标纲要明确了   \n",
       "3      教育部长陈宝生《旗帜》撰文：建设高质量教育体系，加快建成教育强国   \n",
       "4           重磅！《推进粤港澳大湾区高等教育合作发展规划》正式印发   \n",
       "..                                  ...   \n",
       "495              教育部部署2014年高等学校科技改革重点工作   \n",
       "496                     高等教育国际化须警惕过度商业化   \n",
       "497                广东出台“特支计划”重点遴选培养杰出人才   \n",
       "498             75所高校毕业生就业质量报告：须完善规范和标准   \n",
       "499                   分类、开放将成中国高校科研评价方向   \n",
       "\n",
       "                                                    链结          日期  \n",
       "0    https://www.nfu.edu.cn/gjdt/309be8b078444044b5...  2021-04-08  \n",
       "1    https://www.nfu.edu.cn/gjdt/159b20971f8b4051ba...  2021-03-20  \n",
       "2    https://www.nfu.edu.cn/gjdt/27ba495edc1b49f88b...  2021-03-15  \n",
       "3    https://www.nfu.edu.cn/gjdt/20dc120c250642cca5...  2021-01-05  \n",
       "4    https://www.nfu.edu.cn/gjdt/b43531427fb44695bb...  2020-12-22  \n",
       "..                                                 ...         ...  \n",
       "495  https://www.nfu.edu.cn/gjdt/b9331db1468b4c2783...  2014-04-18  \n",
       "496  https://www.nfu.edu.cn/gjdt/e2a83d35ce63437e81...  2014-04-17  \n",
       "497  https://www.nfu.edu.cn/gjdt/a10bea05e10f4940b4...  2014-04-14  \n",
       "498  https://www.nfu.edu.cn/gjdt/da3b772713c34cb084...  2014-04-14  \n",
       "499  https://www.nfu.edu.cn/gjdt/fcfca6a4203f4b5586...  2014-04-14  \n",
       "\n",
       "[500 rows x 3 columns]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "for i in range(25):\n",
    "    r4 = session.get(\"https://www.nfu.edu.cn/gjdt/index%d.htm\"%(i))\n",
    "    title.extend(r4.html.xpath('//div[@class=\"news_title\"]/a/@title')) \n",
    "    links.extend(r4.html.xpath('//div[@class=\"news_title\"]/a/@href'))\n",
    "    data.extend(r4.html.xpath('//font[@class=\"right-more\"]/text()'))\n",
    "    \n",
    "df4 = pd.DataFrame( {\n",
    "        \"标题\": title,\n",
    "        \"链结\": [\"https://www.nfu.edu.cn/gjdt/\"+i for i in links],\n",
    "        \"日期\": data,\n",
    "             } )\n",
    "df4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
